In [1]:
# Importing the libraries  
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from warnings import filterwarnings 
filterwarnings("ignore")
In [2]:
data=pd.read_csv(r"C:\Users\laxma\Downloads\Mobile_Price_Classification.csv")
In [3]:
data.shape
Out[3]:
(2000, 21)
In [4]:
data.head()
Out[4]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
0 842 0 2.2 0 1 0 7 0.6 188 2 ... 20 756 2549 9 7 19 0 0 1 1
1 1021 1 0.5 1 0 1 53 0.7 136 3 ... 905 1988 2631 17 3 7 1 1 0 2
2 563 1 0.5 1 2 1 41 0.9 145 5 ... 1263 1716 2603 11 2 9 1 1 0 2
3 615 1 2.5 0 0 0 10 0.8 131 6 ... 1216 1786 2769 16 8 11 1 0 0 2
4 1821 1 1.2 0 13 1 44 0.6 141 2 ... 1208 1212 1411 8 2 15 1 1 0 1

5 rows × 21 columns

In [5]:
data.tail()
Out[5]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
1995 794 1 0.5 1 0 1 2 0.8 106 6 ... 1222 1890 668 13 4 19 1 1 0 0
1996 1965 1 2.6 1 0 0 39 0.2 187 4 ... 915 1965 2032 11 10 16 1 1 1 2
1997 1911 0 0.9 1 1 1 36 0.7 108 8 ... 868 1632 3057 9 1 5 1 1 0 3
1998 1512 0 0.9 0 4 1 46 0.1 145 5 ... 336 670 869 18 10 19 1 1 1 0
1999 510 1 2.0 1 5 1 45 0.9 168 6 ... 483 754 3919 19 4 2 1 1 1 3

5 rows × 21 columns

In [6]:
data.isnull().sum()
Out[6]:
battery_power    0
blue             0
clock_speed      0
dual_sim         0
fc               0
four_g           0
int_memory       0
m_dep            0
mobile_wt        0
n_cores          0
pc               0
px_height        0
px_width         0
ram              0
sc_h             0
sc_w             0
talk_time        0
three_g          0
touch_screen     0
wifi             0
price_range      0
dtype: int64
In [7]:
data.describe()
Out[7]:
battery_power blue clock_speed dual_sim fc four_g int_memory m_dep mobile_wt n_cores ... px_height px_width ram sc_h sc_w talk_time three_g touch_screen wifi price_range
count 2000.000000 2000.0000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 ... 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000 2000.000000
mean 1238.518500 0.4950 1.522250 0.509500 4.309500 0.521500 32.046500 0.501750 140.249000 4.520500 ... 645.108000 1251.515500 2124.213000 12.306500 5.767000 11.011000 0.761500 0.503000 0.507000 1.500000
std 439.418206 0.5001 0.816004 0.500035 4.341444 0.499662 18.145715 0.288416 35.399655 2.287837 ... 443.780811 432.199447 1084.732044 4.213245 4.356398 5.463955 0.426273 0.500116 0.500076 1.118314
min 501.000000 0.0000 0.500000 0.000000 0.000000 0.000000 2.000000 0.100000 80.000000 1.000000 ... 0.000000 500.000000 256.000000 5.000000 0.000000 2.000000 0.000000 0.000000 0.000000 0.000000
25% 851.750000 0.0000 0.700000 0.000000 1.000000 0.000000 16.000000 0.200000 109.000000 3.000000 ... 282.750000 874.750000 1207.500000 9.000000 2.000000 6.000000 1.000000 0.000000 0.000000 0.750000
50% 1226.000000 0.0000 1.500000 1.000000 3.000000 1.000000 32.000000 0.500000 141.000000 4.000000 ... 564.000000 1247.000000 2146.500000 12.000000 5.000000 11.000000 1.000000 1.000000 1.000000 1.500000
75% 1615.250000 1.0000 2.200000 1.000000 7.000000 1.000000 48.000000 0.800000 170.000000 7.000000 ... 947.250000 1633.000000 3064.500000 16.000000 9.000000 16.000000 1.000000 1.000000 1.000000 2.250000
max 1998.000000 1.0000 3.000000 1.000000 19.000000 1.000000 64.000000 1.000000 200.000000 8.000000 ... 1960.000000 1998.000000 3998.000000 19.000000 18.000000 20.000000 1.000000 1.000000 1.000000 3.000000

8 rows × 21 columns

In [8]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_screen   2000 non-null   int64  
 19  wifi           2000 non-null   int64  
 20  price_range    2000 non-null   int64  
dtypes: float64(2), int64(19)
memory usage: 328.2 KB
In [9]:
data.duplicated().sum()
Out[9]:
0
In [10]:
data.columns
Out[10]:
Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')
In [11]:
#VISUALIZATION
In [12]:
plt.bar(data['blue'],data['fc'])
plt.xticks(rotation=90)
plt.show()
In [13]:
fig=px.bar(data,x='int_memory',y='ram',color='int_memory')
fig.show()
In [14]:
plt.scatter(data['mobile_wt'],data['pc'],color='yellowgreen')
plt.xticks(rotation=90)
plt.show()
In [15]:
plt.figure(figsize=(10,4))
sns.countplot(x='clock_speed', data=data, color='b')
plt.show()
In [16]:
plt.figure(figsize=(10,4))
top_car = data['talk_time'].value_counts().nlargest(10)
sns.countplot(y=data.talk_time,color='red')
Out[16]:
<AxesSubplot:xlabel='count', ylabel='talk_time'>
In [17]:
sns.lineplot(x='clock_speed', y='wifi', data=data)
Out[17]:
<AxesSubplot:xlabel='clock_speed', ylabel='wifi'>
In [18]:
sns.barplot(data['sc_h'],data['sc_w'],color='k')
plt.xticks(rotation=90)
plt.show()
In [19]:
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='px_height', y='px_width')
plt.xlabel('px_height')
plt.ylabel('px_width')
plt.show()
In [20]:
sns.displot(data["n_cores"])
Out[20]:
<seaborn.axisgrid.FacetGrid at 0x23b9b1b26d0>
In [21]:
sns.countplot(x='four_g',data=data)
Out[21]:
<AxesSubplot:xlabel='four_g', ylabel='count'>
In [22]:
sns.boxplot(x='four_g',y='fc',data=data)
Out[22]:
<AxesSubplot:xlabel='four_g', ylabel='fc'>
In [23]:
sns.violinplot(x='blue',y='battery_power',data=data)
Out[23]:
<AxesSubplot:xlabel='blue', ylabel='battery_power'>
In [24]:
#MODEL BUILDING
In [25]:
y = data["price_range"].values
x_data=data.drop(["price_range"],axis=1)
x = (x_data-np.min(x_data))/(np.max(x_data)-np.min(x_data))
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.2,random_state=1)
In [26]:
from sklearn.svm import SVC
svm=SVC(random_state=1)
svm.fit(x_train,y_train)
print("train accuracy:",svm.score(x_train,y_train))
print("test accuracy:",svm.score(x_test,y_test))
train accuracy: 0.97125
test accuracy: 0.8375
In [ ]:
 
In [ ]: